# --------------------------------------------------------
# ' Close Elections
# ' Table S1. and S2.
# --------------------------------------------------------
library(here)
library(rio)
library(data.table)
library(maps)
library(stringr)
library(dplyr)
library(tidyr)
library(papeR)
library(kableExtra)
	options(knitr.kable.NA = '')
library(huxtable)

# specify your root directory here
here = '~/Dropbox/project-archive/BG-bk/dataverse'

source(file.path(here,'code','rdata_utils.R'))

# read data 
data = data.table(import(file.path(here,'data','processed','CE_cleaned_reg.dta')))

#------------------------------------------------------------------------------
# Appendix Table : close elections
#==============================================================================
sample_size = with(data[one==1 & close_election == 1,],table(state,year))
sample_size = data.frame(sample_size) %>% spread(key=year,value=Freq)
sample_size[sample_size > 0] <- 1

sample_size$total = rowSums(sample_size[,c("1992","2000","2008","2016")])
sample_size = sample_size[order(-sample_size$total),]

export(sample_size,file.path(here,'output','table_s1_close_election_state.xlsx'))


#------------------------------------------------------------------------------
# Appendix Table S2
#==============================================================================
# add distribution of outcomes
data$ns1 = ifelse(data$n_size4==1, 1, 0)
data$ns2 = ifelse(data$n_size4==2, 1, 0)
data$ns3 = ifelse(data$n_size4==3, 1, 0)
data$ns4 = ifelse(data$n_size4==4, 1, 0)

data$nr0 = ifelse(data$n_relative==0, 1, 0)
data$nr1 = ifelse(data$n_relative==1, 1, 0)
data$nr2 = ifelse(data$n_relative==2, 1, 0)
data$nr3 = ifelse(data$n_relative==3, 1, 0)
data$nr4 = ifelse(data$n_relative>=4, 1, 0)

data$nm0 = ifelse(data$n_middle==0, 1, 0)
data$nm1 = ifelse(data$n_middle==1, 1, 0)
data$nm2 = ifelse(data$n_middle==2, 1, 0)
data$nm3 = ifelse(data$n_middle==3, 1, 0)
data$nm4 = ifelse(data$n_middle>=4, 1, 0)

# basic descriptive statistics : use stargazer to report this?
demo_variable = c('r_age','r_female','r_educ','r_white','r_black','r_hispanic','r_other','r_married','r_working')
pol_variable = c('r_pol_interest','r_party_intensity')
DV_ind = c('n_size4','isolation','ns1','ns2','ns3','ns4',
	'pol_one_diff',
	"i_relative",
	"i_middle")

election_variable = c('close_election',
	'n_ad','p_attack','p_promote','p_contrast')

# categorical variables into binary ones
data[, year := as.factor(year)]
data[, r_white := ifelse(r_race == 1, 1, 0)]
data[, r_black := ifelse(r_race == 2, 1, 0)]
data[, r_hispanic := ifelse(r_race == 3, 1, 0)]
data[, r_other := ifelse(r_race == 4, 1, 0)]

data[, r_race := factor(r_race,levels=c(1,2,3,4),labels=c('white','black','hispanic','other'))]

# specify analytic sample 
data[, anal_one := 0L]
nonNA = complete.cases(data[,c(demo_variable,pol_variable,election_variable,DV_ind,"state","year"),with=FALSE])
data[nonNA==TRUE,anal_one := 1L]

# variable label 
ind_var_label = c('Age','Female','Education',
	'White','Black','Hispanic','Other Race','Married','Currently working',
	'Political Interest','Partisan Intensity')
ind_net_label= c('Network size','Isolation','Size=1','Size=2','Size=3','Size=4+',
	'Cross-cutting exposure',
	'Talking to the middle',
	'Talking to relatives')
state_var_label = c('Close election',
	'Daily Freq of Pol Ads','P(tone=attack)','P(tone=promote)','P(tone=contrast)')

generate_summary_table_year = function(yy){
	sum_one = summarize(data[one==1 & anal_one >= 0 & year == yy,
		c(demo_variable,pol_variable,DV_ind),
		with=FALSE], test=FALSE,quantiles=FALSE)
	sum_election = summarize(data[one==1 & anal_one >= 0 & year == yy,
		c(election_variable),
		with=FALSE], test=FALSE,quantiles=FALSE)
	
	if (yy == 1992) {
		sum_election = rbind(sum_election, 
			rep(NA,3),
			rep(NA,3),
			rep(NA,3),
			rep(NA,3))
		sum_election[,1] = election_variable
		colnames(sum_election)[2] = "Missing"
		sum_election = sum_election[,c(1,4,5)]
	} else if (yy == 2000){
		sum_election = sum_election[,c(1,5,6)]
	} else {
		sum_election = sum_election[,c(1,4,5)]
	}
	
	sum_one = sum_one[,c(1,5,6)]
	
	sum_all = rbind(sum_one,sum_election)

	colnames(sum_all) = c("", "Mean (%)","SD")
	sum_all[,1] = c(ind_var_label,ind_net_label,state_var_label)	
	return(sum_all)
}

out = cbind(
	generate_summary_table_year(1992),
	generate_summary_table_year(2000)[,c(2,3)],
	generate_summary_table_year(2008)[,c(2,3)],
	generate_summary_table_year(2016)[,c(2,3)])

# do not display SD for % measures
out[c(2,4:9,12,13:21, 23:25),3] = NA
out[c(2,4:9,12,13:21, 23:25),5] = NA
out[c(2,4:9,12,13:21, 23:25),7] = NA
out[c(2,4:9,12,13:21, 23:25),9] = NA
colnames(out)[1] <- ""
rownames(out) = 1:nrow(out)

export(out, file=file.path(here,'output','table_s2_summary.xlsx'))


